# Timing Matters: Climate Policies and Adaptive Resilience
# Data Cleaning and Preparation
# D'Amico & Maboudi (2025)

# ============================================================================
# SETUP
# ============================================================================

rm(list = ls())

# Required packages
library(countrycode)
library(data.table)
library(dplyr)
library(e1071)
library(mice)
library(plm)
library(readr)
library(readxl)
library(stringr)
library(tidyr)
library(vdemdata)
library(zoo)

# ============================================================================
# LOAD BASE DATA
# ============================================================================

# Load existing panel data
pdata <- read_csv("pdata_export.csv") %>%
  select(iso3c, year, everything())

# ============================================================================
# ADAPTIVE CAPACITY (ND-GAIN)
# ============================================================================

# Load ND-GAIN components
capacity <- read_csv("capacity.csv")
sensitivity <- read_csv("sensitivity.csv")
vulnerability <- read_csv("vulnerability.csv")

# Reshape to long format
capacity_long <- capacity %>%
  pivot_longer(cols = -c(ISO3, Name), names_to = "year", values_to = "capacity") %>%
  mutate(year = as.numeric(year))

sensitivity_long <- sensitivity %>%
  pivot_longer(cols = -c(ISO3, Name), names_to = "year", values_to = "sensitivity") %>%
  mutate(year = as.numeric(year))

vulnerability_long <- vulnerability %>%
  pivot_longer(cols = -c(ISO3, Name), names_to = "year", values_to = "vulnerability") %>%
  mutate(year = as.numeric(year))

# Merge ND-GAIN measures
new_dvs <- capacity_long %>%
  left_join(sensitivity_long, by = c("ISO3", "Name", "year")) %>%
  left_join(vulnerability_long, by = c("ISO3", "Name", "year")) %>%
  rename(iso3c = ISO3)

pdata <- pdata %>%
  left_join(new_dvs, by = c("iso3c", "year"))

# ============================================================================
# CLIMATE ADAPTATION LAWS (GRANTHAM INSTITUTE)
# ============================================================================

cclw <- read_excel("granthamCCLW.xlsx")

# Extract year from timeline
cclw <- cclw %>%
  mutate(year = as.numeric(str_sub(First_event_in_timeline, 1, 4)))

# Count adaptation laws by type
cl_domestic <- cclw %>%
  filter(Category != "UNFCCC", str_detect(`Topic/Response`, "Adaptation")) %>%
  group_by(Geography_ISO, year) %>%
  summarise(cl_domestic_count = n(), .groups = "drop")

adaptation_executive <- cclw %>%
  filter(Category == "Executive", str_detect(`Topic/Response`, "Adaptation")) %>%
  group_by(Geography_ISO, year) %>%
  summarise(adaptation_executive_count = n(), .groups = "drop")

adaptation_legislative <- cclw %>%
  filter(Category == "Legislative", str_detect(`Topic/Response`, "Adaptation")) %>%
  group_by(Geography_ISO, year) %>%
  summarise(adaptation_legislative_count = n(), .groups = "drop")

adaptation_plans <- cclw %>%
  filter(if_any(where(is.character), ~ str_detect(.x, "Adaptation Plan"))) %>%
  group_by(Geography_ISO, year) %>%
  summarise(adaptation_plans_count = n(), .groups = "drop")

adaptation_orig <- cclw %>%
  filter(str_detect(`Topic/Response`, "Adaptation")) %>%
  group_by(Geography_ISO, year) %>%
  summarise(adaptation_orig_count = n(), .groups = "drop")

mitigation_orig <- cclw %>%
  filter(str_detect(`Topic/Response`, "Mitigation")) %>%
  group_by(Geography_ISO, year) %>%
  summarise(mitigation_count = n(), .groups = "drop")

# Merge climate law counts
complete_grid <- cclw %>%
  select(Geography_ISO, year) %>%
  distinct() %>%
  filter(!is.na(Geography_ISO), !is.na(year))

climate_data <- complete_grid %>%
  left_join(cl_domestic, by = c("Geography_ISO", "year")) %>%
  left_join(adaptation_executive, by = c("Geography_ISO", "year")) %>%
  left_join(adaptation_legislative, by = c("Geography_ISO", "year")) %>%
  left_join(adaptation_plans, by = c("Geography_ISO", "year")) %>%
  left_join(adaptation_orig, by = c("Geography_ISO", "year")) %>%
  left_join(mitigation_orig, by = c("Geography_ISO", "year")) %>%
  rename(iso3c = Geography_ISO)

# Create cumulative stocks
climate_data <- climate_data %>%
  arrange(iso3c, year) %>%
  group_by(iso3c) %>%
  mutate(
    cl_domestic_stock = cumsum(coalesce(cl_domestic_count, 0)),
    adaptation_executive_stock = cumsum(coalesce(adaptation_executive_count, 0)),
    adaptation_legislative_stock = cumsum(coalesce(adaptation_legislative_count, 0)),
    adaptation_plans_stock = cumsum(coalesce(adaptation_plans_count, 0)),
    adaptation_orig_stock = cumsum(coalesce(adaptation_orig_count, 0)),
    mitigation_stock = cumsum(coalesce(mitigation_count, 0))
  ) %>%
  ungroup()

# Merge with main data
pdata$year <- as.numeric(as.character(pdata$year))
climate_data$year <- as.numeric(as.character(climate_data$year))

pdata <- pdata %>%
  left_join(climate_data, by = c("iso3c", "year")) %>%
  mutate(across(ends_with("_count"), ~ replace_na(.x, 0))) %>%
  arrange(iso3c, year) %>%
  group_by(iso3c) %>%
  fill(ends_with("_stock"), .direction = "down") %>%
  mutate(across(ends_with("_stock"), ~ replace_na(.x, 0))) %>%
  ungroup()

# ============================================================================
# POPULISM (V-PARTY DATA)
# ============================================================================

vparty <- vdemdata::vparty %>%
  mutate(iso3c = countrycode(country_id, origin = "vdem", destination = "iso3c")) %>%
  filter(!is.na(iso3c))

country_year_populism <- vparty %>%
  group_by(iso3c, year) %>%
  summarize(
    avg_populism = mean(v2xpa_popul, na.rm = TRUE),
    max_populism = ifelse(any(!is.na(v2xpa_popul)), max(v2xpa_popul, na.rm = TRUE), NA),
    .groups = 'drop'
  )

pdata <- pdata %>%
  left_join(country_year_populism, by = c("iso3c", "year")) %>%
  group_by(iso3c) %>%
  arrange(year) %>%
  fill(avg_populism, max_populism, .direction = "downup") %>%
  ungroup()

# ============================================================================
# CLIMATE DISASTERS (EM-DAT)
# ============================================================================

emdat <- read_excel("EMDAT.xlsx") %>%
  group_by(iso3c, year) %>%
  summarise(disaster_count = n(), .groups = "drop")

pdata <- pdata %>%
  left_join(emdat, by = c("iso3c", "year")) %>%
  mutate(disaster_count = replace_na(disaster_count, 0))

# ============================================================================
# INTERNATIONAL AGREEMENTS (IGES)
# ============================================================================

agmts_clim <- read_excel("IGES+NDC+Database_v7.7.xlsx", sheet = "Sheet1")

agmts_clim_reshaped <- agmts_clim %>%
  crossing(year = 1998:2025) %>%
  mutate(
    kyoto_sig = ifelse(!is.na(kyoto_sig_year) & year == kyoto_sig_year, 1, 0),
    kyoto_rat = ifelse(!is.na(kyoto_rat_year) & year == kyoto_rat_year, 1, 0),
    paris_sig = ifelse(!is.na(paris_sig_year) & year == paris_sig_year, 1, 0),
    paris_rat = ifelse(!is.na(paris_rat_year) & year == paris_rat_year, 1, 0),
    ndc_sub = ifelse(!is.na(ndc_sub_year) & year == ndc_sub_year, 1, 0)
  ) %>%
  select(country, year, kyoto_sig, kyoto_rat, paris_sig, paris_rat, ndc_sub, if_adaptation) %>%
  arrange(country, year) %>%
  mutate(iso3c = countrycode(country, origin = "country.name", destination = "iso3c"))

pdata <- pdata %>%
  left_join(agmts_clim_reshaped, by = c("iso3c", "year")) %>%
  mutate(across(c(kyoto_sig, kyoto_rat, paris_sig, paris_rat, ndc_sub, if_adaptation), ~ replace_na(.x, 0)))

# ============================================================================
# ECONOMIC SHOCKS
# ============================================================================

pdata <- pdata %>%
  mutate(
    ifparis_bin = if_else(year >= 2016, 1, 0),
    financial_recession = ifelse(year >= 2007 & year <= 2009, 1, 0)
  )

# ============================================================================
# ADAPTATION AID (OECD RIO MARKERS)
# ============================================================================

oecd_bilcom <- read_csv("oecd_bilcom.csv") %>%
  group_by(iso3c, year) %>%
  summarise(adaptationaid_recipient = sum(adaptationaid_recipient, na.rm = TRUE), .groups = "drop")

pdata <- pdata %>%
  left_join(oecd_bilcom, by = c("iso3c", "year")) %>%
  mutate(adaptationaid_recipient = replace_na(adaptationaid_recipient, 0))

# ============================================================================
# ENVIRONMENTAL EXPENDITURE (IMF)
# ============================================================================

envexp <- read_excel("EnvExp_Indicator_14_Expenditure_on_Environmental_Protection_4117694454459396340.xlsx")

envexp_long <- envexp %>%
  pivot_longer(cols = matches("^[0-9]{4}$"), names_to = "year", values_to = "env_exp") %>%
  mutate(
    year = as.numeric(year),
    iso3c = countrycode(Country, origin = "country.name", destination = "iso3c")
  ) %>%
  filter(!is.na(iso3c)) %>%
  select(iso3c, year, env_exp)

pdata <- pdata %>%
  left_join(envexp_long, by = c("iso3c", "year")) %>%
  group_by(iso3c) %>%
  arrange(year) %>%
  fill(env_exp, .direction = "downup") %>%
  ungroup()

# Calculate as % of GDP
pdata <- pdata %>%
  mutate(envexp_pgdp = ifelse(!is.na(env_exp) & !is.na(gdp), 
                               (env_exp / gdp) * 100, NA_real_))

# ============================================================================
# IMPUTATION
# ============================================================================

set.seed(1995)

impute_vars <- c("capacity", "sensitivity", "vulnerability", "ave_readiness", 
                 "max_populism", "gdpgrowth_per")
predictors <- c("readiness", "economic_readiness", "governance_readiness", 
                "adaptation_law_stock", "ROL_perc", "ln_gdp", "ln_gdp_square", 
                "percent_gdp_services", "imports_pGDP", "auton", "clim_vulnerability", 
                "sum_readiness", "global_north", "disaster_count", "ifparis_bin", 
                "kyoto_sig", "kyoto_rat", "paris_sig", "paris_rat", "ndc_sub", 
                "if_adaptation", "financial_recession", "adaptationaid_recipient", 
                "envexp_pgdp")

pdata_imputed <- pdata %>%
  group_by(iso3c) %>%
  group_split() %>%
  purrr::map_df(function(df) {
    df_sub <- df %>% select(all_of(c(impute_vars, predictors)))
    imp <- mice(df_sub, m = 1, maxit = 5, printFlag = FALSE)
    comp <- complete(imp, 1)
    df %>%
      select(-all_of(impute_vars)) %>%
      bind_cols(comp %>% select(all_of(impute_vars)))
  })

pdata <- pdata_imputed

# ============================================================================
# LOG TRANSFORMATIONS
# ============================================================================

# Identify highly skewed variables
numeric_vars <- pdata %>% select(where(is.numeric))
skewness_df <- numeric_vars %>%
  summarise(across(everything(), ~ skewness(.x, na.rm = TRUE))) %>%
  pivot_longer(cols = everything(), names_to = "variable", values_to = "skewness") %>%
  filter(abs(skewness) > 1)

# Apply log transformations
pdata <- pdata %>%
  mutate(
    adaptationaid_recipient = log(pmax(adaptationaid_recipient, 0) + 1e-6),
    envexp_pgdp = log(pmax(envexp_pgdp, 0) + 1e-6),
    disaster_count = log(pmax(disaster_count, 0) + 1e-6),
    adaptation_law_stock = log(pmax(adaptation_law_stock, 0) + 1e-6),
    imports_pGDP = log(pmax(imports_pGDP, 0) + 1e-6),
    gdpgrowth_per = log(pmax(gdpgrowth_per, 0) + 1e-6)
  )

# ============================================================================
# CREATE LAGS
# ============================================================================

pdata <- as.data.table(pdata)
pdata[, year := as.numeric(as.character(year))]
setorder(pdata, iso3c, year)

# Lagged dependent variables
pdata[, lag1_capacity := shift(capacity, n = 1, type = "lag"), by = iso3c]
pdata[, lag3_capacity := shift(capacity, n = 3, type = "lag"), by = iso3c]
pdata[, lag5_capacity := shift(capacity, n = 5, type = "lag"), by = iso3c]
pdata[, lag7_capacity := shift(capacity, n = 7, type = "lag"), by = iso3c]

pdata[, lag1_sensitivity := shift(sensitivity, n = 1, type = "lag"), by = iso3c]
pdata[, lag3_sensitivity := shift(sensitivity, n = 3, type = "lag"), by = iso3c]
pdata[, lag5_sensitivity := shift(sensitivity, n = 5, type = "lag"), by = iso3c]
pdata[, lag7_sensitivity := shift(sensitivity, n = 7, type = "lag"), by = iso3c]

pdata[, lag1_vulnerability := shift(vulnerability, n = 1, type = "lag"), by = iso3c]
pdata[, lag3_vulnerability := shift(vulnerability, n = 3, type = "lag"), by = iso3c]
pdata[, lag5_vulnerability := shift(vulnerability, n = 5, type = "lag"), by = iso3c]
pdata[, lag7_vulnerability := shift(vulnerability, n = 7, type = "lag"), by = iso3c]

# Convert to panel data frame
pdata <- pdata.frame(pdata, index = c("iso3c", "year")) %>%
  mutate(year = as.factor(year))

# ============================================================================
# SAVE CLEANED DATA
# ============================================================================

write.csv(pdata, "pdata_clean.csv", row.names = FALSE)

cat("Data cleaning complete. Output saved to pdata_clean.csv\n")
